options(timeout=180)
library(ggspavis)
library(openxlsx)
library(dplyr)
library(scran)
library(scater)
library(TENxVisiumData)
library(SpatialExperiment)
library(SpatialFeatureExperiment)
library(Voyager)
library(patchwork)
library(nnSVG)
library(GSVA)
library(BiocParallel)
spe <- MouseBrainSagittalAnterior()
spe <- spe[, colData(spe)$sample_id == "MouseBrainSagittalAnterior1"]
Subset to keep only spots over tissue
spe <- spe[, int_colData(spe)$spatialData$in_tissue == TRUE]
Identify mitochondrial genes
is_mito <- grepl("(^MT-)|(^mt-)", rowData(spe)$symbol)
spe <- addPerCellQC(spe, subsets = list(mito = is_mito))
Select QC thresholds sum: Library size (represents the total sum of UMI counts per spot). detected: The number of expressed features (refers to the number of genes with non-zero UMI counts per spot). subsets_mito_percent: A high proportion of mitochondrial reads indicates cell damage.
To select the QC thresholds we first plot them and choose the thresholds visually.
hist(colData(spe)$sum, breaks = 20, col = "skyblue", border = "white",
xlab = "UMI Counts per Spot", ylab = "Frequency",
main = "Distribution of UMI Counts per Spot")
grid(col = "gray", lwd = 0.5)
hist(colData(spe)$detected, breaks = 20, col = "skyblue", border = "white",
xlab = "Number of genes with non-zero UMI counts per spot", ylab = "Frequency",
main = "Distribution of number of genes with non-zero UMI counts per spot")
grid(col = "gray", lwd = 0.5)
hist(colData(spe)$subsets_mito_percent, breaks = 20, col = "skyblue", border = "white",
xlab = "Percentage of mitochondrial reads", ylab = "Frequency",
main = "Distribution of percentage of mitochondrial reads")
grid(col = "gray", lwd = 0.5)
qc_lib_size <- colData(spe)$sum < 2000
qc_detected <- colData(spe)$detected < 1000
qc_mito <- colData(spe)$subsets_mito_percent > 30
Number of discarded spots for each metric
apply(cbind(qc_lib_size, qc_detected, qc_mito), 2, sum)
## qc_lib_size qc_detected qc_mito
## 26 23 14
discard <- qc_lib_size | qc_detected | qc_mito
table(discard)
## discard
## FALSE TRUE
## 2655 40
colData(spe)$discard <- discard
Plot discarded spots
# check spatial pattern of combined set of discarded spots
plotVisium(spe, x_coord = "pxl_row_in_fullres", y_coord = "pxl_col_in_fullres",
fill = "discard")
Filter low-quality spots
spe <- spe[, !colData(spe)$discard]
Calculate log-transformed normalized counts (abbreviated as “logcounts”) using library size factors.
spe <- computeLibraryFactors(spe)
spe <- logNormCounts(spe)
spe <- spe[!is_mito, ]
Now, we keep only highly variable genes.
# fit mean-variance relationship
dec <- modelGeneVar(spe, assay.type = "logcounts")
# select top HVGs
top_hvgs <- getTopHVGs(dec, prop = 0.9)
spe <- spe[rownames(spe) %in% top_hvgs,]
fit <- metadata(dec)
plot(fit$mean, fit$var,
xlab = "mean of log-expression", ylab = "variance of log-expression")
curve(fit$trend(x), col = "dodgerblue", add = TRUE, lwd = 2)
Compute PCA
set.seed(123)
spe <- runPCA(spe, subset_row = top_hvgs)
reducedDimNames(spe)
## [1] "PCA"
Perform graph-based clustering
set.seed(123)
k <- 28
g <- buildSNNGraph(spe, k = k, use.dimred = "PCA")
g_walk <- igraph::cluster_walktrap(g)
clus <- g_walk$membership
table(clus)
## clus
## 1 2 3 4 5 6 7 8 9
## 424 210 320 581 476 314 129 78 123
colLabels(spe) <- factor(clus)
Cellmarker data from http://bio-bigdata.hrbmu.edu.cn/CellMarker/CellMarker_download.html
cellmarkers <- read.xlsx('Cell_marker_Mouse.xlsx', rowNames =F)
Extract Gene Set info from the xlsx file
cell_types <- cellmarkers %>% filter(tissue_class=='Brain' & cell_type=="Normal cell") %>% dplyr::count(cell_name) %>% arrange(desc(n)) %>% filter(n>1) %>% select(cell_name)
cell_types <- cell_types$cell_name
# filter genes symbol info on the xlsx file
cm_sets <- lapply(cell_types, function(x) cellmarkers %>%
filter(tissue_class=='Brain' &
cell_type=="Normal cell" &
cell_name==x &
!(is.na(Symbol))) %>% pull(Symbol) %>% unique())
cell_types <- unique(make.names(cell_types))
names(cm_sets) <- cell_types
set_names <- names(cm_sets)
Create list with geneset names from SpatialExperiment object
genesetlist <- lapply(set_names, function(x) {
geneset <- rownames(rowData(spe)[rowData(spe)$symbol %in% cm_sets[[x]], ,drop=FALSE])
})
names(genesetlist) <- set_names
genesetlist <- genesetlist[sapply(genesetlist, function(x) length(x) > 1)]
# Function for comparison between GSVA scores and clusters
source("gsva_cluster_tests.R")
SpatialExperiment object with GSVA scores.
gsvaPar <- gsvaParam(spe, genesetlist, assay = "logcounts", kcdf="none", minSize = 2)
system.time(res <- gsva(gsvaPar, BPPARAM=MulticoreParam(workers=4)))
## Warning in .filterGenes(dataMatrix, removeConstant = removeConstant,
## removeNzConstant = removeNzConstant): 144 genes with constant non-zero values
## throughout the sample.
## Warning in .filterGenes(dataMatrix, removeConstant = removeConstant,
## removeNzConstant = removeNzConstant): Genes with constant non-zero values are
## discarded.
## Setting parallel calculations through a MulticoreParam back-end with workers=4 and tasks=100.
## Estimating GSVA scores for 144 gene sets.
## Estimating ECDFs directly
## Estimating ECDFs in parallel on 4 cores
## user system elapsed
## 52.940 35.540 41.379
We coerce the SpatialExperiment object to SpatialFeatureExperiment object in order to use the package Voyager functionalities.
gsva_sfe <- toSpatialFeatureExperiment(res)
colGraph(gsva_sfe, "visium", sample_id = "MouseBrainSagittalAnterior1") <- findVisiumGraph(gsva_sfe, sample_id = "MouseBrainSagittalAnterior1", zero.policy = TRUE)
assays(gsva_sfe)$logcounts <- assays(gsva_sfe)$es
moran_res <- runUnivariate(gsva_sfe, type = "moran", colGraphName = "visium")
rowData(moran_res)[order(rowData(moran_res)$moran_MouseBrainSagittalAnterior1,decreasing = TRUE),][1:10,2:3]
## DataFrame with 10 rows and 2 columns
## moran_MouseBrainSagittalAnterior1
## <numeric>
## Myelinating.stem.cell 0.843874
## Striatopallidal.loop.cell 0.815366
## Vascular.leptomeningeal.cell 0.814274
## Synaptic.cell 0.784577
## Myelinating.oligodendrocyte 0.771354
## Early.GABAergic.neuron 0.760723
## D1.Medium.spiny.neuron.D1.MSN. 0.756597
## Astrocyte 0.749458
## Satellite.glial.cell 0.748608
## Oligodendrocyte 0.721197
## K_MouseBrainSagittalAnterior1
## <numeric>
## Myelinating.stem.cell 1.56658
## Striatopallidal.loop.cell 4.16227
## Vascular.leptomeningeal.cell 4.82437
## Synaptic.cell 5.09456
## Myelinating.oligodendrocyte 4.28530
## Early.GABAergic.neuron 3.38674
## D1.Medium.spiny.neuron.D1.MSN. 6.13580
## Astrocyte 4.34787
## Satellite.glial.cell 4.28117
## Oligodendrocyte 10.92995
spe_nnSVG <- nnSVG(res, assay_name = "es", BPPARAM=MulticoreParam(workers=4))
rowData(spe_nnSVG)[order(rowData(spe_nnSVG)$rank,decreasing = TRUE),][1:10,2:15]
## DataFrame with 10 rows and 14 columns
## sigma.sq tau.sq phi loglik
## <numeric> <numeric> <numeric> <numeric>
## Disease.associated.microglial.cell 0.005727859 0.03935095 7.36083e+01 352.401
## M1.macrophage 0.000395995 0.00352343 4.93239e+01 3597.986
## Monocyte.derived.macrophage 0.000187491 0.00343928 7.05622e+00 3708.621
## Early.intermediate.precursor.cell 0.000789102 0.01466384 4.68450e+00 1786.319
## Vascular.endothelial.cell 0.001728764 0.01816830 1.15604e+01 1464.384
## Border.associated.macrophage 0.000530252 0.00316531 3.07233e+01 3700.688
## Late.GABAergic.neuron 0.001024825 0.01441191 6.59090e-05 1806.802
## Hemogenic.endothelial.cell 0.000993726 0.01065153 2.08556e+00 2193.264
## Intermediate.GABAergic.neuron 0.001101066 0.01134260 6.18977e+00 2098.128
## Plasmacytoid.dendritic.cell.pDC. 0.000547286 0.00413581 9.49803e+00 3410.168
## runtime mean var spcov
## <numeric> <numeric> <numeric> <numeric>
## Disease.associated.microglial.cell 3.008 NA NA NA
## M1.macrophage 2.529 NA NA NA
## Monocyte.derived.macrophage 1.523 NA NA NA
## Early.intermediate.precursor.cell 1.640 NA NA NA
## Vascular.endothelial.cell 2.303 NA NA NA
## Border.associated.macrophage 2.050 NA NA NA
## Late.GABAergic.neuron 1.288 NA NA NA
## Hemogenic.endothelial.cell 2.674 NA NA NA
## Intermediate.GABAergic.neuron 1.920 NA NA NA
## Plasmacytoid.dendritic.cell.pDC. 0.570 NA NA NA
## prop_sv loglik_lm LR_stat rank
## <numeric> <numeric> <numeric> <numeric>
## Disease.associated.microglial.cell 0.1270632 347.042 10.7175 144
## M1.macrophage 0.1010341 3589.585 16.8027 143
## Monocyte.derived.macrophage 0.0516962 3692.630 31.9816 142
## Early.intermediate.precursor.cell 0.0510649 1768.274 36.0911 141
## Vascular.endothelial.cell 0.0868854 1436.326 56.1164 140
## Border.associated.macrophage 0.1434835 3672.069 57.2372 139
## Late.GABAergic.neuron 0.0663887 1775.391 62.8235 138
## Hemogenic.endothelial.cell 0.0853331 2153.137 80.2558 137
## Intermediate.GABAergic.neuron 0.0884840 2055.406 85.4442 136
## Plasmacytoid.dendritic.cell.pDC. 0.1168643 3353.784 112.7672 135
## pval padj
## <numeric> <numeric>
## Disease.associated.microglial.cell 4.70688e-03 4.70688e-03
## M1.macrophage 2.24568e-04 2.26138e-04
## Monocyte.derived.macrophage 1.13575e-07 1.15175e-07
## Early.intermediate.precursor.cell 1.45517e-08 1.48613e-08
## Vascular.endothelial.cell 6.52367e-13 6.71006e-13
## Border.associated.macrophage 3.72480e-13 3.85878e-13
## Late.GABAergic.neuron 2.27596e-14 2.37491e-14
## Hemogenic.endothelial.cell 0.00000e+00 0.00000e+00
## Intermediate.GABAergic.neuron 0.00000e+00 0.00000e+00
## Plasmacytoid.dendritic.cell.pDC. 0.00000e+00 0.00000e+00
top_nnSVG_genesets <- rownames(rowData(spe_nnSVG)[order(rowData(spe_nnSVG)$rank,decreasing = FALSE),][1:10,])
plot_list_hist_gsva_nnsvg <- lapply(top_nnSVG_genesets, function(x){
pl <- list(x,plotVisium(res,y_coord = "pxl_col_in_fullres", x_coord = "pxl_row_in_fullres", fill = x, assay = "es",sample_ids=NULL) + ggtitle(paste0(x," GSVA scores superposed in histological image" ))+ theme(text = element_text(size = 7)))
pl
})
for (i in 1:length(plot_list_hist_gsva_nnsvg)){
print(plot_list_hist_gsva_nnsvg[i][[1]][[2]])
#print(gsva_clusters_ttests(spe,res,plot_list_hist_gsva_nnsvg[i][[1]][[1]]))
}
most_corr<-most_cor_geneset(spe,res)
corr_plots <- lapply(1:length(unique(colData(spe)$label)), function(x){
pl <- plotVisium(res,y_coord = "pxl_col_in_fullres", x_coord = "pxl_row_in_fullres", fill = most_corr[x], assay = "es",sample_ids=NULL) + labs(subtitle=NULL)+ggtitle(paste0(most_corr[x]," GSVA scores (most correlated with cluster ",x,")" ))+ theme(text = element_text(size = 7))
})
for (i in 1:length(corr_plots)){
print(corr_plots[[i]]+cluster_plot)
}